{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import numpy as np\n",
    "import librosa\n",
    "from tqdm import tqdm\n",
    "dataset_path = \"data/\"\n",
    "test_list = \"testing_list.txt\"\n",
    "valid_list = \"validation_list.txt\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(test_list, \"r\") as f:\n",
    "    test_file = f.readlines()\n",
    "for i in range(len(test_file)):\n",
    "    test_file[i] = test_file[i].strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'bed': 0,\n",
       " 'wow': 1,\n",
       " 'five': 2,\n",
       " 'left': 3,\n",
       " 'down': 4,\n",
       " 'forward': 5,\n",
       " 'sheila': 6,\n",
       " 'go': 7,\n",
       " 'dog': 8,\n",
       " 'up': 9,\n",
       " 'zero': 10,\n",
       " 'yes': 11,\n",
       " 'learn': 12,\n",
       " 'two': 13,\n",
       " 'right': 14,\n",
       " 'four': 15,\n",
       " 'eight': 16,\n",
       " 'cat': 17,\n",
       " 'happy': 18,\n",
       " 'six': 19,\n",
       " 'tree': 20,\n",
       " 'bird': 21,\n",
       " 'follow': 22,\n",
       " 'marvin': 23,\n",
       " 'on': 24,\n",
       " 'stop': 25,\n",
       " 'one': 26,\n",
       " 'visual': 27,\n",
       " 'house': 28,\n",
       " 'off': 29,\n",
       " 'seven': 30,\n",
       " 'nine': 31,\n",
       " 'no': 32,\n",
       " 'three': 33,\n",
       " 'backward': 34}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# preoapre for the label list\n",
    "targets = os.listdir(dataset_path)\n",
    "for t in targets:\n",
    "    if t.startswith(\".\"):\n",
    "        targets.remove(t)\n",
    "target_map = {}\n",
    "for i, t in enumerate(targets):\n",
    "    target_map[t] = i\n",
    "target_map"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 36/36 [00:00<00:00, 290.20it/s]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "105829"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# prepare for the training files\n",
    "train_file = []\n",
    "for t in tqdm(os.listdir(dataset_path)):\n",
    "    for st in os.listdir(os.path.join(dataset_path, t)):\n",
    "        train_file.append(os.path.join(t, st))\n",
    "len(train_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "94824"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_file = list(set(train_file) - set(test_file))\n",
    "len(train_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 11005/11005 [00:00<00:00, 13387.43it/s]\n"
     ]
    }
   ],
   "source": [
    "output_dicts = []\n",
    "for f in tqdm(test_file):\n",
    "    y, sr = librosa.load(os.path.join(dataset_path, f), sr = None)\n",
    "    temp_dict = {\n",
    "        \"name\": f,\n",
    "        \"target\": target_map[f.split(\"/\")[0]],\n",
    "        \"waveform\": y\n",
    "    }\n",
    "    output_dicts.append(temp_dict)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save(\"scv2_test.npy\", output_dicts)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
